{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "view-in-github"
   },
   "source": [
    "<a href=\"https://colab.research.google.com/github/shenweichen/DeepMatch/blob/master/examples/colab_MovieLen1M_YoutubeDNN.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "rtox72csOQUN"
   },
   "source": [
    "# DeepMatch 样例代码\n",
    "- https://github.com/shenweichen/DeepMatch\n",
    "- https://deepmatch.readthedocs.io/en/latest/"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "bTWHz-heMkyw"
   },
   "source": [
    "# 下载movielens-1M数据 安装依赖包"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 510
    },
    "colab_type": "code",
    "id": "yTl6d6jO1oqf",
    "outputId": "19f1902e-17a6-4a07-ab42-f012bd286eda"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--2020-04-10 09:06:19--  http://files.grouplens.org/datasets/movielens/ml-1m.zip\n",
      "Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152\n",
      "Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:80... connected.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 5917549 (5.6M) [application/zip]\n",
      "Saving to: ‘./ml-1m.zip’\n",
      "\n",
      "\r",
      "./ml-1m.zip           0%[                    ]       0  --.-KB/s               \r",
      "./ml-1m.zip           7%[>                   ] 443.20K  1.85MB/s               \r",
      "./ml-1m.zip         100%[===================>]   5.64M  14.4MB/s    in 0.4s    \n",
      "\n",
      "2020-04-10 09:06:19 (14.4 MB/s) - ‘./ml-1m.zip’ saved [5917549/5917549]\n",
      "\n",
      "--2020-04-10 09:06:21--  https://raw.githubusercontent.com/shenweichen/DeepMatch/master/examples/preprocess.py\n",
      "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n",
      "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 2049 (2.0K) [text/plain]\n",
      "Saving to: ‘preprocess.py’\n",
      "\n",
      "preprocess.py       100%[===================>]   2.00K  --.-KB/s    in 0s      \n",
      "\n",
      "2020-04-10 09:06:21 (34.8 MB/s) - ‘preprocess.py’ saved [2049/2049]\n",
      "\n",
      "Archive:  ml-1m.zip\n",
      "  inflating: ml-1m/movies.dat        \n",
      "  inflating: ml-1m/ratings.dat       \n",
      "  inflating: ml-1m/README            \n",
      "  inflating: ml-1m/users.dat         \n",
      "TensorFlow 1.x selected.\n",
      "\u001b[33mWARNING: Skipping tensorflow as it is not installed.\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "! wget http://files.grouplens.org/datasets/movielens/ml-1m.zip -O ./ml-1m.zip \n",
    "! wget https://raw.githubusercontent.com/shenweichen/DeepMatch/master/examples/preprocess.py -O preprocess.py\n",
    "! unzip -o ml-1m.zip \n",
    "%tensorflow_version 1.x\n",
    "! pip uninstall -y -q tensorflow\n",
    "! pip install -q tensorflow-gpu==1.14.0\n",
    "! pip install -q deepmatch"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "p9UxNHuPMuW2"
   },
   "source": [
    "# 导入需要的库"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 496
    },
    "colab_type": "code",
    "id": "C_ZR6gzp1E2N",
    "outputId": "8401132a-5090-464a-879d-a27492416f4c"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:516: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
      "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:517: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
      "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:518: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
      "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:519: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
      "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:520: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
      "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n",
      "/usr/local/lib/python3.6/dist-packages/tensorboard/compat/tensorflow_stub/dtypes.py:541: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
      "/usr/local/lib/python3.6/dist-packages/tensorboard/compat/tensorflow_stub/dtypes.py:542: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
      "/usr/local/lib/python3.6/dist-packages/tensorboard/compat/tensorflow_stub/dtypes.py:543: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
      "/usr/local/lib/python3.6/dist-packages/tensorboard/compat/tensorflow_stub/dtypes.py:544: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
      "/usr/local/lib/python3.6/dist-packages/tensorboard/compat/tensorflow_stub/dtypes.py:545: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
      "/usr/local/lib/python3.6/dist-packages/tensorboard/compat/tensorflow_stub/dtypes.py:550: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/initializers.py:143: calling RandomNormal.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Call initializer instance with the dtype argument instead of passing it to the constructor\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from deepctr.inputs import SparseFeat, VarLenSparseFeat\n",
    "from preprocess import gen_data_set, gen_model_input\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "from tensorflow.python.keras import backend as K\n",
    "from tensorflow.python.keras.models import Model\n",
    "\n",
    "from deepmatch.models import *\n",
    "from deepmatch.utils import sampledsoftmaxloss\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "fQq6O9XAMzPF"
   },
   "source": [
    "# 读取数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 139
    },
    "colab_type": "code",
    "id": "lcO29zFb21Od",
    "outputId": "97d9023e-cbd5-43dd-d2ef-769e3a34cf2c"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:4: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.\n",
      "  after removing the cwd from sys.path.\n",
      "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:6: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.\n",
      "  \n",
      "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:8: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.\n",
      "  \n"
     ]
    }
   ],
   "source": [
    "data_path = \"./\"\n",
    "\n",
    "unames = ['user_id','gender','age','occupation','zip']\n",
    "user = pd.read_csv(data_path+'ml-1m/users.dat',sep='::',header=None,names=unames)\n",
    "rnames = ['user_id','movie_id','rating','timestamp']\n",
    "ratings = pd.read_csv(data_path+'ml-1m/ratings.dat',sep='::',header=None,names=rnames)\n",
    "mnames = ['movie_id','title','genres']\n",
    "movies = pd.read_csv(data_path+'ml-1m/movies.dat',sep='::',header=None,names=mnames)\n",
    "\n",
    "data = pd.merge(pd.merge(ratings,movies),user)#.iloc[:10000]\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "L0yCWxQxM3se"
   },
   "source": [
    "# 构建特征列，训练模型，导出embedding"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "colab_type": "code",
    "id": "BMOvk_de2ML3",
    "outputId": "1e43a5e7-f71c-45a4-bab4-e1d6b1a73669"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 6040/6040 [00:12<00:00, 489.02it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "6 6\n",
      "WARNING:tensorflow:Entity <bound method SequencePoolingLayer.call of <deepctr.layers.sequence.SequencePoolingLayer object at 0x7eff6fa7b358>> could not be transformed and will be executed as-is. Please report this to the AutgoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: converting <bound method SequencePoolingLayer.call of <deepctr.layers.sequence.SequencePoolingLayer object at 0x7eff6fa7b358>>: AttributeError: module 'gast' has no attribute 'Num'\n",
      "WARNING: Entity <bound method SequencePoolingLayer.call of <deepctr.layers.sequence.SequencePoolingLayer object at 0x7eff6fa7b358>> could not be transformed and will be executed as-is. Please report this to the AutgoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: converting <bound method SequencePoolingLayer.call of <deepctr.layers.sequence.SequencePoolingLayer object at 0x7eff6fa7b358>>: AttributeError: module 'gast' has no attribute 'Num'\n",
      "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/deepctr/layers/utils.py:167: calling reduce_sum_v1 (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "keep_dims is deprecated, use keepdims instead\n",
      "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/deepctr/layers/utils.py:193: div (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Deprecated in favor of operator or tf.math.divide.\n",
      "WARNING:tensorflow:Entity <bound method NoMask.call of <deepctr.layers.utils.NoMask object at 0x7eff6fa8f668>> could not be transformed and will be executed as-is. Please report this to the AutgoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: converting <bound method NoMask.call of <deepctr.layers.utils.NoMask object at 0x7eff6fa8f668>>: AssertionError: Bad argument number for Name: 3, expecting 4\n",
      "WARNING: Entity <bound method NoMask.call of <deepctr.layers.utils.NoMask object at 0x7eff6fa8f668>> could not be transformed and will be executed as-is. Please report this to the AutgoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: converting <bound method NoMask.call of <deepctr.layers.utils.NoMask object at 0x7eff6fa8f668>>: AssertionError: Bad argument number for Name: 3, expecting 4\n",
      "WARNING:tensorflow:Entity <bound method NoMask.call of <deepctr.layers.utils.NoMask object at 0x7eff6fa8f668>> could not be transformed and will be executed as-is. Please report this to the AutgoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: converting <bound method NoMask.call of <deepctr.layers.utils.NoMask object at 0x7eff6fa8f668>>: AssertionError: Bad argument number for Name: 3, expecting 4\n",
      "WARNING: Entity <bound method NoMask.call of <deepctr.layers.utils.NoMask object at 0x7eff6fa8f668>> could not be transformed and will be executed as-is. Please report this to the AutgoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: converting <bound method NoMask.call of <deepctr.layers.utils.NoMask object at 0x7eff6fa8f668>>: AssertionError: Bad argument number for Name: 3, expecting 4\n",
      "WARNING:tensorflow:Entity <bound method NoMask.call of <deepctr.layers.utils.NoMask object at 0x7eff6fa8f668>> could not be transformed and will be executed as-is. Please report this to the AutgoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: converting <bound method NoMask.call of <deepctr.layers.utils.NoMask object at 0x7eff6fa8f668>>: AssertionError: Bad argument number for Name: 3, expecting 4\n",
      "WARNING: Entity <bound method NoMask.call of <deepctr.layers.utils.NoMask object at 0x7eff6fa8f668>> could not be transformed and will be executed as-is. Please report this to the AutgoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: converting <bound method NoMask.call of <deepctr.layers.utils.NoMask object at 0x7eff6fa8f668>>: AssertionError: Bad argument number for Name: 3, expecting 4\n",
      "WARNING:tensorflow:Entity <bound method NoMask.call of <deepctr.layers.utils.NoMask object at 0x7eff6fa8f668>> could not be transformed and will be executed as-is. Please report this to the AutgoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: converting <bound method NoMask.call of <deepctr.layers.utils.NoMask object at 0x7eff6fa8f668>>: AssertionError: Bad argument number for Name: 3, expecting 4\n",
      "WARNING: Entity <bound method NoMask.call of <deepctr.layers.utils.NoMask object at 0x7eff6fa8f668>> could not be transformed and will be executed as-is. Please report this to the AutgoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: converting <bound method NoMask.call of <deepctr.layers.utils.NoMask object at 0x7eff6fa8f668>>: AssertionError: Bad argument number for Name: 3, expecting 4\n",
      "WARNING:tensorflow:Entity <bound method NoMask.call of <deepctr.layers.utils.NoMask object at 0x7eff6fa8f668>> could not be transformed and will be executed as-is. Please report this to the AutgoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: converting <bound method NoMask.call of <deepctr.layers.utils.NoMask object at 0x7eff6fa8f668>>: AssertionError: Bad argument number for Name: 3, expecting 4\n",
      "WARNING: Entity <bound method NoMask.call of <deepctr.layers.utils.NoMask object at 0x7eff6fa8f668>> could not be transformed and will be executed as-is. Please report this to the AutgoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: converting <bound method NoMask.call of <deepctr.layers.utils.NoMask object at 0x7eff6fa8f668>>: AssertionError: Bad argument number for Name: 3, expecting 4\n",
      "WARNING:tensorflow:Entity <bound method NoMask.call of <deepctr.layers.utils.NoMask object at 0x7eff6fa8f668>> could not be transformed and will be executed as-is. Please report this to the AutgoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: converting <bound method NoMask.call of <deepctr.layers.utils.NoMask object at 0x7eff6fa8f668>>: AssertionError: Bad argument number for Name: 3, expecting 4\n",
      "WARNING: Entity <bound method NoMask.call of <deepctr.layers.utils.NoMask object at 0x7eff6fa8f668>> could not be transformed and will be executed as-is. Please report this to the AutgoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: converting <bound method NoMask.call of <deepctr.layers.utils.NoMask object at 0x7eff6fa8f668>>: AssertionError: Bad argument number for Name: 3, expecting 4\n",
      "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/init_ops.py:1288: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Call initializer instance with the dtype argument instead of passing it to the constructor\n",
      "WARNING:tensorflow:Entity <bound method DNN.call of <deepctr.layers.core.DNN object at 0x7eff6fa8f128>> could not be transformed and will be executed as-is. Please report this to the AutgoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: converting <bound method DNN.call of <deepctr.layers.core.DNN object at 0x7eff6fa8f128>>: AttributeError: module 'gast' has no attribute 'Num'\n",
      "WARNING: Entity <bound method DNN.call of <deepctr.layers.core.DNN object at 0x7eff6fa8f128>> could not be transformed and will be executed as-is. Please report this to the AutgoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: converting <bound method DNN.call of <deepctr.layers.core.DNN object at 0x7eff6fa8f128>>: AttributeError: module 'gast' has no attribute 'Num'\n",
      "WARNING:tensorflow:Entity <bound method SampledSoftmaxLayer.call of <deepmatch.layers.core.SampledSoftmaxLayer object at 0x7eff6fa92c50>> could not be transformed and will be executed as-is. Please report this to the AutgoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: converting <bound method SampledSoftmaxLayer.call of <deepmatch.layers.core.SampledSoftmaxLayer object at 0x7eff6fa92c50>>: AssertionError: Bad argument number for Name: 3, expecting 4\n",
      "WARNING: Entity <bound method SampledSoftmaxLayer.call of <deepmatch.layers.core.SampledSoftmaxLayer object at 0x7eff6fa92c50>> could not be transformed and will be executed as-is. Please report this to the AutgoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: converting <bound method SampledSoftmaxLayer.call of <deepmatch.layers.core.SampledSoftmaxLayer object at 0x7eff6fa92c50>>: AssertionError: Bad argument number for Name: 3, expecting 4\n",
      "Epoch 1/24\n",
      "988129/988129 [==============================] - 22s 23us/sample - loss: 4.0715\n",
      "Epoch 2/24\n",
      "988129/988129 [==============================] - 21s 21us/sample - loss: 3.7855\n",
      "Epoch 3/24\n",
      "988129/988129 [==============================] - 21s 22us/sample - loss: 3.5596\n",
      "Epoch 4/24\n",
      "988129/988129 [==============================] - 21s 21us/sample - loss: 3.4199\n",
      "Epoch 5/24\n",
      "988129/988129 [==============================] - 21s 22us/sample - loss: 3.3216\n",
      "Epoch 6/24\n",
      "988129/988129 [==============================] - 21s 21us/sample - loss: 3.2499\n",
      "Epoch 7/24\n",
      "988129/988129 [==============================] - 21s 22us/sample - loss: 3.1846\n",
      "Epoch 8/24\n",
      "988129/988129 [==============================] - 21s 22us/sample - loss: 3.1264\n",
      "Epoch 9/24\n",
      "988129/988129 [==============================] - 22s 22us/sample - loss: 3.0923\n",
      "Epoch 10/24\n",
      "988129/988129 [==============================] - 21s 22us/sample - loss: 3.0566\n",
      "Epoch 11/24\n",
      "988129/988129 [==============================] - 21s 22us/sample - loss: 3.0336\n",
      "Epoch 12/24\n",
      "988129/988129 [==============================] - 21s 22us/sample - loss: 3.0066\n",
      "Epoch 13/24\n",
      "988129/988129 [==============================] - 21s 22us/sample - loss: 2.9872\n",
      "Epoch 14/24\n",
      "988129/988129 [==============================] - 21s 22us/sample - loss: 2.9659\n",
      "Epoch 15/24\n",
      "988129/988129 [==============================] - 21s 22us/sample - loss: 2.9476\n",
      "Epoch 16/24\n",
      "988129/988129 [==============================] - 21s 22us/sample - loss: 2.9363\n",
      "Epoch 17/24\n",
      "988129/988129 [==============================] - 21s 21us/sample - loss: 2.9267\n",
      "Epoch 18/24\n",
      "988129/988129 [==============================] - 21s 22us/sample - loss: 2.9179\n",
      "Epoch 19/24\n",
      "988129/988129 [==============================] - 21s 22us/sample - loss: 2.9012\n",
      "Epoch 20/24\n",
      "988129/988129 [==============================] - 21s 21us/sample - loss: 2.8925\n",
      "Epoch 21/24\n",
      "988129/988129 [==============================] - 21s 21us/sample - loss: 2.8830\n",
      "Epoch 22/24\n",
      "988129/988129 [==============================] - 22s 22us/sample - loss: 2.8797\n",
      "Epoch 23/24\n",
      "988129/988129 [==============================] - 21s 22us/sample - loss: 2.8711\n",
      "Epoch 24/24\n",
      "988129/988129 [==============================] - 21s 22us/sample - loss: 2.8619\n",
      "(6040, 32)\n",
      "(3706, 32)\n"
     ]
    }
   ],
   "source": [
    "#data = pd.read_csvdata = pd.read_csv(\"./movielens_sample.txt\")\n",
    "sparse_features = [\"movie_id\", \"user_id\",\n",
    "                    \"gender\", \"age\", \"occupation\", \"zip\", ]\n",
    "SEQ_LEN = 50\n",
    "negsample = 0\n",
    "\n",
    "# 1.Label Encoding for sparse features,and process sequence features with `gen_date_set` and `gen_model_input`\n",
    "\n",
    "features = ['user_id', 'movie_id', 'gender', 'age', 'occupation', 'zip']\n",
    "feature_max_idx = {}\n",
    "for feature in features:\n",
    "    lbe = LabelEncoder()\n",
    "    data[feature] = lbe.fit_transform(data[feature]) + 1\n",
    "    feature_max_idx[feature] = data[feature].max() + 1\n",
    "\n",
    "user_profile = data[[\"user_id\", \"gender\", \"age\", \"occupation\", \"zip\"]].drop_duplicates('user_id')\n",
    "\n",
    "item_profile = data[[\"movie_id\"]].drop_duplicates('movie_id')\n",
    "\n",
    "user_profile.set_index(\"user_id\", inplace=True)\n",
    "\n",
    "user_item_list = data.groupby(\"user_id\")['movie_id'].apply(list)\n",
    "\n",
    "train_set, test_set = gen_data_set(data, negsample)\n",
    "\n",
    "train_model_input, train_label = gen_model_input(train_set, user_profile, SEQ_LEN)\n",
    "test_model_input, test_label = gen_model_input(test_set, user_profile, SEQ_LEN)\n",
    "\n",
    "# 2.count #unique features for each sparse field and generate feature config for sequence feature\n",
    "\n",
    "embedding_dim = 32\n",
    "\n",
    "user_feature_columns = [SparseFeat('user_id', feature_max_idx['user_id'], 16),\n",
    "                        SparseFeat(\"gender\", feature_max_idx['gender'], 16),\n",
    "                        SparseFeat(\"age\", feature_max_idx['age'], 16),\n",
    "                        SparseFeat(\"occupation\", feature_max_idx['occupation'], 16),\n",
    "                        SparseFeat(\"zip\", feature_max_idx['zip'], 16),\n",
    "                        VarLenSparseFeat(SparseFeat('hist_movie_id', feature_max_idx['movie_id'], embedding_dim,\n",
    "                                                    embedding_name=\"movie_id\"), SEQ_LEN, 'mean', 'hist_len'),\n",
    "                        ]\n",
    "\n",
    "item_feature_columns = [SparseFeat('movie_id', feature_max_idx['movie_id'], embedding_dim)]\n",
    "\n",
    "# 3.Define Model and train\n",
    "\n",
    "K.set_learning_phase(True)\n",
    "\n",
    "import tensorflow as tf\n",
    "if tf.__version__ >= '2.0.0':\n",
    "    tf.compat.v1.disable_eager_execution()\n",
    "\n",
    "model = YoutubeDNN(user_feature_columns, item_feature_columns, num_sampled=100, user_dnn_hidden_units=(128,64, embedding_dim))\n",
    "# model = MIND(user_feature_columns,item_feature_columns,dynamic_k=False,p=1,k_max=2,num_sampled=100,user_dnn_hidden_units=(128,64, embedding_dim),init_std=0.001)\n",
    "\n",
    "model.compile(optimizer=\"adam\", loss=sampledsoftmaxloss)  # \"binary_crossentropy\")\n",
    "\n",
    "history = model.fit(train_model_input, train_label,  # train_label,\n",
    "                    batch_size=512, epochs=20, verbose=1, validation_split=0.0, )\n",
    "\n",
    "# 4. Generate user features for testing and full item features for retrieval\n",
    "test_user_model_input = test_model_input\n",
    "all_item_model_input = {\"movie_id\": item_profile['movie_id'].values,}\n",
    "\n",
    "user_embedding_model = Model(inputs=model.user_input, outputs=model.user_embedding)\n",
    "item_embedding_model = Model(inputs=model.item_input, outputs=model.item_embedding)\n",
    "\n",
    "user_embs = user_embedding_model.predict(test_user_model_input, batch_size=2 ** 12)\n",
    "# user_embs = user_embs[:, i, :]  # i in [0,k_max) if MIND\n",
    "item_embs = item_embedding_model.predict(all_item_model_input, batch_size=2 ** 12)\n",
    "\n",
    "print(user_embs.shape)\n",
    "print(item_embs.shape)\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "w_G3KWslKmJo"
   },
   "source": [
    "# 使用faiss进行ANN查找并评估结果"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "5SvyQLNVKkcs"
   },
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 51
    },
    "colab_type": "code",
    "id": "j2ZNYNBOOqrN",
    "outputId": "2938673c-ff81-49a2-86d8-4266d2060ea3"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: faiss-cpu in /usr/local/lib/python3.6/dist-packages (1.6.3)\n",
      "Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from faiss-cpu) (1.18.2)\n"
     ]
    }
   ],
   "source": [
    "! pip install faiss-cpu"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 85
    },
    "colab_type": "code",
    "id": "6TY1l27iJU8U",
    "outputId": "5316c37f-fef1-44b3-8c31-6600d1e44da5"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "6040it [00:01, 4290.79it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "recall 0.26473509933774836\n",
      "hit rate 0.26473509933774836\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "\n",
    "\n",
    "test_true_label = {line[0]:[line[2]] for line in test_set}\n",
    "\n",
    "import numpy as np\n",
    "import faiss\n",
    "from tqdm import tqdm\n",
    "from deepmatch.utils import recall_N\n",
    "\n",
    "index = faiss.IndexFlatIP(embedding_dim)\n",
    "# faiss.normalize_L2(item_embs)\n",
    "index.add(item_embs)\n",
    "# faiss.normalize_L2(user_embs)\n",
    "D, I = index.search(np.ascontiguousarray(user_embs), 50)\n",
    "s = []\n",
    "hit = 0\n",
    "for i, uid in tqdm(enumerate(test_user_model_input['user_id'])):\n",
    "    try:\n",
    "        pred = [item_profile['movie_id'].values[x] for x in I[i]]\n",
    "        filter_item = None\n",
    "        recall_score = recall_N(test_true_label[uid], pred, N=50)\n",
    "        s.append(recall_score)\n",
    "        if test_true_label[uid] in pred:\n",
    "            hit += 1\n",
    "    except:\n",
    "        print(i)\n",
    "print(\"\")\n",
    "print(\"recall\", np.mean(s))\n",
    "print(\"hit rate\", hit / len(test_user_model_input['user_id']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "a97TB0obOrRe"
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "authorship_tag": "ABX9TyOrqZNeC0DgyPX2JmJid1m7",
   "collapsed_sections": [],
   "include_colab_link": true,
   "name": "colab_MovieLen1M_YoutubeDNN",
   "provenance": [],
   "toc_visible": true
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
